import scrapy
from scrapy.selector import Selector
from ..items import NewsItem
from ..spiders import utils_crawler

source = u'求是网'


class QushiSpider(scrapy.Spider):
    name = 'qushi'
    base_domains = 'http://www.qstheory.cn/'
    start_urls = [
        "http://www.qstheory.cn/bwtj/index.htm",
        "http://www.qstheory.cn/qsyw/index.htm",
        "http://www.qstheory.cn/qszq/zywz/index.htm"
    ]

    def parse(self, response):
        item_urls = []
        sel = Selector(response)

        hrefs = sel.xpath('//div[@class="qs_gailan01"]//a/@href').extract()
        for href in hrefs:
            item_urls.append(href)
        for item_url in item_urls:
            yield scrapy.Request(url=item_url, callback=self.parse_details)

    def parse_details(self, response):
        item = NewsItem()
        sel = Selector(response)

        item['title'] = sel.xpath('//div[@class="main"]/h1/text()').extract()[0].strip()
        item['href'] = response.url
        item['time'] = utils_crawler.deal_time(sel.xpath('//div[@class="metadata"]/text()').extract()[0].strip())
        item['content'] = utils_crawler.deal_content(sel.xpath('//div[@id="Text_area"]//p/text()').extract())
        item['source'] = source
        item['image_urls'] = sel.xpath('div[@id="Text_area"]//img/@src').extract()

        return item
